install.packages('fastDummies')
library(fastDummies)
library(tidyverse)  # ggplot(), %>%, mutate(), and friends 
library(broom)
library(MatchIt)  # Match things
library(Rcpp)
library(MASS)
library(modelsummary)
library(IRdisplay)
library(haven)
library(sjmisc)
library(dplyr)

getwd()
setwd("")

df <- read_csv('SHR76_20.csv')

### homicide overlap variable
df$Year2 <- df$Year
df$Code_Overlap <- paste(df$State, df$Agency, df$Year2, df$Month, sep="_")
df$Code_Overlap_Agency <- paste(df$State, df$Agency, df$Agentype, df$Year2, df$Month, sep="_")
df$Monthly_Agency_Overlap<- duplicated(df$Code_Overlap_Agency)
df$Monthly_Agency_Overlap <- as.numeric(df$Monthly_Agency_Overlap)
table(df$Monthly_Agency_Overlap) # check number of overlapping homicides

# add plus one to variables mapping whether homicides involved more than 1 victim, more than 1 offender
df$VicCount <- df$VicCount+1
df$OffCount <- df$OffCount+1

# decade function
df <- df%>% mutate(Decade = case_when(
  Year>='1970' & Year <'1980' ~ "70s",
  Year>='1980' & Year <'1990' ~ "80s",
  Year>='1990' & Year <'2000' ~ "90s",
  Year>='2000' & Year <'2010' ~ "00s",
  Year>='2010' & Year <='2020' ~ "10s",
  TRUE ~ "undetermined"
))


# five years function
df <- df%>% mutate(FiveY = case_when(
  Year>='1970' & Year <'1975' ~ "1970-1974",
  Year>='1975' & Year <'1980' ~ "1975-1979",
  Year>='1980' & Year <'1985' ~ "1980-1984",
  Year>='1985' & Year <'1990' ~ "1985-1989",
  Year>='1990' & Year <'1995' ~ "1990-1994",
  Year>='1995' & Year <'2000' ~ "1995-1999",
  Year>='2000' & Year <'2005' ~ "2000-2004",
  Year>='2005' & Year <'2010' ~ "2005-2009",
  Year>='2010' & Year <'2015' ~ "2010-2014",
  Year>='2015' & Year <='2020' ~ "2015-2020",
  TRUE ~ "undetermined"
))

# age function

df <- df%>% mutate(AgeCat = case_when(
  VicAge>=0 & VicAge<=5~ "0-5",
  VicAge>5 & VicAge<=10~ "6-10",
  VicAge>10 & VicAge<=15~ "11-15",
  VicAge>15 & VicAge<=20~ "16-20",
  VicAge>20 & VicAge<=25~ "21-25",
  VicAge>25 & VicAge<=30~ "26-30",
  VicAge>30 & VicAge<=35~ "31-35",
  VicAge>35 & VicAge<=40~ "36-40",
  VicAge>40 & VicAge<=45~ "41-45",
  VicAge>45 & VicAge<=50~ "46-50",
  VicAge>50 & VicAge<=55~ "51-55",
  VicAge>55 & VicAge<=60~ "56-60",
  VicAge>60 & VicAge<=65~ "61-65",
  VicAge>65 & VicAge<=70~ "66-70",
  VicAge>70 & VicAge<=75~ "71-75",
  VicAge>75 & VicAge<=80~ "76-80",
  VicAge>80 & VicAge<=85~ "81-85",
  VicAge>85 & VicAge<=90~ "86-90",
  VicAge>90 & VicAge<=95~ "91-95",
  VicAge>95 & VicAge<=99~ "96-99",
  TRUE ~ "999"
))

# remove unknown age observations
df <- filter(df, VicAge!='999')


# # create dummies for relevant columns
df <- dummy_cols(df, select_columns=c('VicRace','Solved'),
                  remove_selected_columns = TRUE)

# drop irrelevant columns
df2 <- dplyr::select(df, -c('Year', 'ID', 'FileDate', 'MSA', 'Code_Overlap', 'Code_Overlap_Agency','VicAge', 'Solved_No',
                            'CNTYFIPS','Ori',	'Agency',	'Source',	'StateName',	'Incident','ActionType','Situation','VicEthnic',
                            'OffAge',	'OffEthnic',	'Subcircum'))

# export to csv
write.csv(df2, "dataset76_20.csv", row.names=TRUE)
